import numpy as np
import pandas as pd
from aip import AipNlp
import time
from fuzzywuzzy import fuzz

data = pd.read_excel(r'20210720建设银行渠道数据更新底稿.xlsx')
t_data = pd.concat([data[data.匹配准确率 == '70.7%'],data[data.匹配准确率 == '66.7%']])
def f_fuzz(x):
    city_name = x['NAME2'].replace('市', '').replace('分行', '').replace('支行', '').replace('小微', '').replace(
        '营业部', '').replace('自治区', '').replace('地区','')
    text1 = x['NAME3'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
    text2 = x['原表支行名称对照'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
    score1 = fuzz.ratio(text1, text2)
    score2 = fuzz.partial_ratio(text1,text2)
    text3 = x['NAME3']
    text4 = x['原表支行名称对照']
    score3 = fuzz.partial_ratio(text3, text4)
    total_list = [score1,score2,score3]
    return total_list

t_data1 = t_data.copy()
t_data1['fuzz_ratio'] = t_data.apply(f_fuzz,axis=1).apply(lambda x : x[0])
t_data1['fuzz_partial_ratio'] = t_data.apply(f_fuzz,axis=1).apply(lambda x : x[1])
t_data1['fuzz_orginal_partial_ratio'] = t_data.apply(f_fuzz,axis=1).apply(lambda x : x[2])


# def bd_aip1(x):
#
#     city_name = x['NAME2']
#     text1 = x['NAME3']
#     text2 = x['原表支行名称对照']
#     score1 = fuzz.ratio(text1, text2)
#     score2 = fuzz.partial_ratio(text1, text2)
#     score3 = fuzz.token_sort_ratio(text1, text2)
#     score4 = fuzz.token_set_ratio(text1, text2)
#     total_list = [score1, score2, score3, score4]
#     return total_list
# t_data1['fuzz_ratio1'] = t_data.apply(bd_aip1,axis=1).apply(lambda x : x[0])
# t_data1['fuzz_partial_ratio1'] = t_data.apply(bd_aip1,axis=1).apply(lambda x : x[1])
# t_data1['fuzz_token_sort_ratio1'] = t_data.apply(bd_aip1,axis=1).apply(lambda x : x[2])
# t_data1['fuzz_token_set_ratio1'] = t_data.apply(bd_aip1,axis=1).apply(lambda x : x[3])
t_data1.to_excel(r"a5.xlsx")
# def bd_aip1(x):
#     city_name = x['NAME2'].replace('市', '').replace('分行', '').replace('支行', '').replace('小微', '').replace(
#         '营业部', '').replace('自治区', '').replace('地区','')
#     text1 = x['NAME3'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     text2 = x['原表支行名称对照'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     # score1 = fuzz.ratio(text1, text2)
#     score2 = fuzz.partial_ratio(text1,text2)
#     # score3 = fuzz.token_sort_ratio(text1,text2)
#     # score4 = fuzz.token_set_ratio(text1,text2)
#     # total_list = [score1,score2,score3,score4]
#     return score2
# t_data1['fuzz_partial_ratio'] = t_data.apply(bd_aip1,axis=1)
# def bd_aip2(x):
#
#     city_name = x['NAME2'].replace('市', '').replace('分行', '').replace('支行', '').replace('小微', '').replace(
#         '营业部', '').replace('自治区', '').replace('地区','')
#     text1 = x['NAME3'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     text2 = x['原表支行名称对照'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     # score1 = fuzz.ratio(text1, text2)
#     # score2 = fuzz.partial_ratio(text1,text2)
#     score3 = fuzz.token_sort_ratio(text1,text2)
#     # score4 = fuzz.token_set_ratio(text1,text2)
#     # total_list = [score1,score2,score3,score4]
#     return score3
#
# t_data1['fuzz_token_sort_ratio'] = t_data.apply(bd_aip2,axis=1)
#
# def bd_aip3(x):
#
#     city_name = x['NAME2'].replace('市', '').replace('分行', '').replace('支行', '').replace('小微', '').replace(
#         '营业部', '').replace('自治区', '').replace('地区','')
#     text1 = x['NAME3'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     text2 = x['原表支行名称对照'].replace(city_name,'').replace('市','').replace('小微','').replace('自治区','').replace('装修中','').replace('个人信用征信报告查询点','').replace("专柜","").replace("储蓄","").replace("分理处","").replace("营业","").replace("会计","").replace("支行",'')
#     # score1 = fuzz.ratio(text1, text2)
#     # score2 = fuzz.partial_ratio(text1,text2)
#     # score3 = fuzz.token_sort_ratio(text1,text2)
#     score4 = fuzz.token_set_ratio(text1,text2)
#     # total_list = [score1,score2,score3,score4]
#     return score4
# t_data1['fuzz_token_set_ratio'] = t_data.apply(bd_aip3,axis=1)
# t_data1.to_excel(r"a5.xlsx")


# score2 = fuzz.partial_ratio(text1, text2)
# score3 = fuzz.token_sort_ratio(text1, text2)
# score4 = fuzz.token_set_ratio(text1, text2)
# total_list = [score1, score2, score3, score4]